Nitrate-Nitrite (N)

Nitrate-Nitrite (N) Report

Code
if (!requireNamespace("librarian", quietly = TRUE)) {
  # If not installed, install the package
  install.packages("librarian")
}

librarian::shelf(
  glue,
  here,
  skimr,
  ggplot2
)

data <- read.csv(here("data/df_cleaned.csv"))
parameter_name <- params$parameter_name
apply param bounds
bounds <- read.csv(here("parameter_bounds.csv"), stringsAsFactors = FALSE, strip.white = T)
lower_bound <- bounds$min[bounds$param == parameter_name]
upper_bound <- bounds$max[bounds$param == parameter_name]

filter_condition <- (data$Parameter == parameter_name & (data$Value < lower_bound | data$Value > upper_bound))

tryCatch({  # this tryCatch is for when filter_condition is logical(0) i.e. no matches
  data <- dplyr::filter(
    data, 
    !filter_condition
  )
  print(glue("{sum(filter_condition)} rows dropped as < {lower_bound} or > {upper_bound}"))
}, error = function(e){
  print(glue("no rows removed"))
})
0 rows dropped as < 0 or > 8
apply param bounds
print(glue("{sum(filter_condition)} rows dropped as < {lower_bound} or > {upper_bound}"))
0 rows dropped as < 0 or > 8
write cleaned DataFrame to a file
write.csv(data, here("data/df_cleaned_02.csv"), row.names = FALSE)
load data & skim
subset_data <- subset(data, Parameter == parameter_name)
print(skimr::skim(subset_data))
── Data Summary ────────────────────────
                           Values     
Name                       subset_data
Number of rows             34293      
Number of columns          17         
_______________________               
Column type frequency:                
  character                4          
  numeric                  13         
________________________              
Group variables            None       

── Variable type: character ────────────────────────────────────────────────────
  skim_variable n_missing complete_rate min max empty n_unique whitespace
1 Source                0             1   3  21     0       11          0
2 Site                  0             1   1  28     0     1038          0
3 Parameter             0             1  19  19     0        1          0
4 Units                 0             1   2   4     0        3          0

── Variable type: numeric ──────────────────────────────────────────────────────
   skim_variable     n_missing complete_rate       mean         sd       p0
 1 ...1                      0        1      358547.    262910.    137209  
 2 Latitude                  0        1          26.2        0.719     24.4
 3 Longitude                 0        1         -80.4        0.616    -85.7
 4 Month                     0        1           6.71       3.44       1  
 5 Day                      28        0.999      12.4        7.63       1  
 6 Year                      0        1        2021.         2.10    2005  
 7 Value                     0        1           0.112      0.444      0  
 8 Sample.Depth           1365        0.960       2.83      21.5        0  
 9 Total.Depth           32972        0.0385      5.17       7.85       0.5
10 verbatimValue             0        1           0.112      0.444      0  
11 VerbatimLatitude          0        1          26.2        0.719     24.4
12 verbatimLongitude         0        1         -80.4        0.616    -85.7
13 Value_orig                0        1           0.112      0.444      0  
          p25         p50        p75      p100 hist 
 1 152369     160942      700847     709935    ▇▁▁▁▅
 2     25.8       26.1        26.7       30.8  ▂▇▂▁▁
 3    -80.4      -80.1       -80.1      -80.0  ▁▁▁▁▇
 4      4          7          10         12    ▇▅▅▆▇
 5      6         11          18         31    ▇▆▅▃▂
 6   2019       2021        2023       2024    ▁▁▁▅▇
 7      0.005      0.0097      0.054      7.71 ▇▁▁▁▁
 8      0.5        0.5         0.787   2494    ▇▁▁▁▁
 9      0.704      2.82        6.4       52.7  ▇▁▁▁▁
10      0.005      0.0097      0.054      7.71 ▇▁▁▁▁
11     25.8       26.1        26.7       30.8  ▂▇▂▁▁
12    -80.4      -80.1       -80.1      -80.0  ▁▁▁▁▇
13      0.005      0.0097      0.054      7.71 ▇▁▁▁▁
create params$parameter_name histogram
ggplot2::ggplot(subset_data, aes(x=Value)) +
    geom_histogram(bins=30, fill="blue", color="black") +
    scale_y_log10() +  # Transform the y-axis to a logarithmic scale
    labs(title=paste("Histogram of Values for", params$parameter_name),
         x="Value",
         y="Log Frequency") +
    theme_minimal()